## Callin Switzer
## 17 Nov 2017
## Multilevel model to visualize bees'
## behavior on the artificial pollen system

#install packages
ipak <- function(pkg){
     new.pkg <- pkg[!(pkg %in% installed.packages()[, "Package"])]
     if(length(new.pkg)) install.packages(new.pkg, dependencies = TRUE)
     sapply(pkg, require, character.only = TRUE)
}

packages <- c("ggplot2", "reshape2", 'lme4', 'sjPlot', "multcomp", "plyr", "effects")
ipak(packages)
## Loading required package: ggplot2
## Loading required package: reshape2
## Loading required package: lme4
## Loading required package: Matrix
## Loading required package: sjPlot
## Warning in checkMatrixPackageVersion(): Package version inconsistency detected.
## TMB was built with Matrix version 1.2.10
## Current Matrix version is 1.2.11
## Please re-install 'TMB' from source or restore original 'Matrix' package
## Loading required package: multcomp
## Loading required package: mvtnorm
## Loading required package: survival
## Loading required package: TH.data
## Loading required package: MASS
## 
## Attaching package: 'TH.data'
## The following object is masked from 'package:MASS':
## 
##     geyser
## Loading required package: plyr
## Loading required package: effects
## Loading required package: carData
## lattice theme set by effectsTheme()
## See ?effectsTheme for details.
##  ggplot2 reshape2     lme4   sjPlot multcomp     plyr  effects 
##     TRUE     TRUE     TRUE     TRUE     TRUE     TRUE     TRUE
# set ggplot theme
theme_set(theme_bw())

# define data and figure directories
dataDir <- "/Users/cswitzer/Dropbox/SonicationBehavior/SonBehData"
figDir <- "/Users/cswitzer/Dropbox/SonicationBehavior/SonBehFigs"

print(paste("last run ", Sys.time()))
## Warning in as.POSIXlt.POSIXct(x, tz): unknown timezone 'zone/tz/2017c.1.0/
## zoneinfo/America/Los_Angeles'
## [1] "last run  2017-12-08 20:35:54"
print(R.version)
##                _                           
## platform       x86_64-apple-darwin15.6.0   
## arch           x86_64                      
## os             darwin15.6.0                
## system         x86_64, darwin15.6.0        
## status                                     
## major          3                           
## minor          4.2                         
## year           2017                        
## month          09                          
## day            28                          
## svn rev        73368                       
## language       R                           
## version.string R version 3.4.2 (2017-09-28)
## nickname       Short Summer

Read in data and double check it

sl <- read.csv(file.path(dataDir, '01_CombinedTrials_cleaned.csv'))
head(sl)
##   beeCol hive meanFreq IT_imputed index freq     amp
## 1   blue    4 395.2778       3.61     1  400 0.06328
## 2   blue    4 395.2778       3.61     2  360 0.34299
## 3   blue    4 395.2778       3.61     3  430 0.44099
## 4   blue    4 395.2778       3.61     4  420 0.16841
## 5   blue    4 395.2778       3.61     5  420 0.15284
## 6   blue    4 395.2778       3.61     6  410 0.18302
##                    datetime rewNum rewTF lowRewAmp highrewAmp
## 1  2016_11_22__08_23_47_721      1     T         0          5
## 2  2016_11_22__08_23_49_677      2     T         0          5
## 3  2016_11_22__08_23_50_139      3     T         0          5
## 4  2016_11_22__08_24_10_938      4     T         0          5
## 5  2016_11_22__08_24_18_013      5     T         0          5
## 6  2016_11_22__08_24_18_464      6     T         0          5
##                         BeeNumCol
## 1 BeeBlue_22Nov2016_Hive4_initial
## 2 BeeBlue_22Nov2016_Hive4_initial
## 3 BeeBlue_22Nov2016_Hive4_initial
## 4 BeeBlue_22Nov2016_Hive4_initial
## 5 BeeBlue_22Nov2016_Hive4_initial
## 6 BeeBlue_22Nov2016_Hive4_initial
##                                     accFile trialNum
## 1 2016_11_22__08_23_47_721_220_450_test.txt        1
## 2 2016_11_22__08_23_49_677_220_450_test.txt        1
## 3 2016_11_22__08_23_50_139_220_450_test.txt        1
## 4 2016_11_22__08_24_10_938_220_450_test.txt        1
## 5 2016_11_22__08_24_18_013_220_450_test.txt        1
## 6 2016_11_22__08_24_18_464_220_450_test.txt        1
##                 datetime_str lowFrq highFrq   IT  trt
## 1 2016-11-22 08:23:47.721000    220     450 3.61 full
## 2 2016-11-22 08:23:49.677000    220     450 3.61 full
## 3 2016-11-22 08:23:50.139000    220     450 3.61 full
## 4 2016-11-22 08:24:10.938000    220     450 3.61 full
## 5 2016-11-22 08:24:18.013000    220     450 3.61 full
## 6 2016-11-22 08:24:18.464000    220     450 3.61 full
dim(sl) # should be 24303, 20
## [1] 24303    20
table(sl$hive, useNA = 'always')
## 
##     3     4     5  <NA> 
##   513  1047 22743     0
## make sure all bee colors are lowercase
sl$beeCol <- tolower(sl$beeCol)

# make sure there are values lower than 220 and higher than 450 
# (the cutoff for buzzes used in the experiment)
hist(sl$freq, breaks = seq(215, 450, by = 10))

sl[sl$freq < 220 | sl$freq > 450,] # should have 0 rows
##  [1] beeCol       hive         meanFreq     IT_imputed   index       
##  [6] freq         amp          datetime     rewNum       rewTF       
## [11] lowRewAmp    highrewAmp   BeeNumCol    accFile      trialNum    
## [16] datetime_str lowFrq       highFrq      IT           trt         
## <0 rows> (or 0-length row.names)
# look at treatments
xtabs(~sl$beeCol+ trt, data = sl )
##                   trt
## sl$beeCol          full high  low
##   blue               36    0    0
##   gold               54    0    0
##   goldred             3    0    0
##   green              13    0    0
##   lime               28    0    0
##   limeblue          530    0    0
##   limegold           54    0    0
##   limegreen          50    0    0
##   limeorange         84    0    0
##   limepink           51    0    0
##   limepurple        109    0  243
##   limepurpleyellow   58    0    0
##   limered            50    0 3424
##   limesilver          3    0    0
##   limewhite         101    0    0
##   limeyellow         52    0    0
##   orange             85  416    0
##   orangeblue         50    0    0
##   orangegreen        50    0    0
##   orangepink         50    0    0
##   orangepurple       50    0    0
##   purple              9    0    0
##   redblue           673    0    0
##   redgreen          530    0    0
##   redpink            56    0  621
##   redpurple          55  163    0
##   silver             26    0    0
##   white             283    0    0
##   whiteblue          56    0 2730
##   whitegold          48    0 1177
##   whitegreen         39    0    0
##   whiteorange        54    0 1068
##   whitepink         135 1049    0
##   whitepurple        55    0    0
##   whitered           59 1285    0
##   whiteyellow       157    0    0
##   yellowblue         54 1533    0
##   yellowgreen       532    0    0
##   yelloworange       54 2059    0
##   yellowpink         58    0 2251
##   yellowpurple       54    0 1189
##   yellowred         547    0    0
# find percentage reward by treatment
mean(grepl("[tT]", as.character(sl$rewTF))) # overall mean
## [1] 0.4155454
# percentage that were rewarded by treatment
tapply((grepl("[tT]", as.character(sl$rewTF))), INDEX= sl$trt, mean)
##      full      high       low 
## 1.0000000 0.3535742 0.2128631
# total number of trials for each treatment
tapply((grepl("[tT]", as.character(sl$rewTF))), INDEX= sl$trt, length)
##  full  high   low 
##  5095  6505 12703
# total number of rewards per treatment
tapply((grepl("[tT]", as.character(sl$rewTF))), INDEX= sl$trt, FUN = function(x) sum(x))
## full high  low 
## 5095 2300 2704
# total number of trials that were unrewarded per treatment
tapply((grepl("[tT]", as.character(sl$rewTF))), INDEX= sl$trt, FUN = function(x) sum(!x))
## full high  low 
##    0 4205 9999

Visualizations and modeling

# calculate trial averages and plot
sl$colNum = paste(sl$beeCol, sl$hive, sep = "_")
sl$trt <- as.character(sl$trt)
sl$trt[sl$trt == "full" & sl$trialNum >1 ] <- "full_2"


aggdata <- aggregate(sl$freq, by=list(colNum = sl$colNum,trialNum =sl$trialNum, trt = sl$trt), FUN=mean, na.rm=TRUE)
colnames(aggdata)[colnames(aggdata) == "x"] = "freq"

aggdata_sd <- aggregate(sl$freq, by=list(colNum = sl$colNum,trialNum =sl$trialNum, trt = sl$trt), FUN=sd, na.rm=TRUE)
colnames(aggdata_sd)[colnames(aggdata_sd) == "x"] = "freq_sd"

aggdata = merge(aggdata, aggdata_sd)
#aggdata$trt = sapply(1:nrow(aggdata), FUN = function(x){sl[sl$colNum == aggdata$colNum[x] & sl$trialNum == aggdata$trialNum[x], "trt"][1]})
aggdata = aggdata[order(aggdata$colNum, aggdata$trialNum, decreasing = FALSE), ]
agg_sm = aggdata[aggdata$trialNum <= 2, ]
rownames(agg_sm) = 1:nrow(agg_sm)
agg_sm
##                colNum trialNum    trt     freq  freq_sd
## 1              blue_4        1   full 395.2778 39.38717
## 2              gold_3        1   full 348.3333 37.15089
## 3           goldred_4        1   full 406.6667 32.14550
## 4             green_4        1   full 318.8889 57.32461
## 5             green_4        2 full_2 285.0000 59.16080
## 6              lime_5        1   full 374.6429 36.36055
## 7          limeblue_5        1   full 302.4000 34.73280
## 8          limeblue_5        2 full_2 299.8148 39.02059
## 9          limegold_5        1   full 344.6296 44.07168
## 10        limegreen_5        1   full 370.8000 29.40498
## 11       limeorange_5        1   full 285.7576 35.26953
## 12       limeorange_5        2 full_2 344.5098 26.70683
## 13         limepink_5        1   full 308.4314 43.23760
## 14       limepurple_5        1   full 352.2642 38.66232
## 15       limepurple_5        2    low 344.2798 32.55948
## 16 limepurpleyellow_5        1   full 336.7241 50.76219
## 17          limered_5        1   full 354.6000 37.91559
## 18          limered_5        2    low 365.3381 36.20344
## 19       limesilver_5        1   full 306.6667 11.54701
## 20        limewhite_5        1   full 292.0000 44.90352
## 21        limewhite_5        2 full_2 327.0588 35.51305
## 22       limeyellow_5        1   full 339.4231 39.37818
## 23           orange_3        1   full 388.5294 38.22837
## 24           orange_5        1   full 345.0980 33.60789
## 25       orangeblue_5        1   full 301.4000 45.17585
## 26      orangegreen_5        1   full 296.2000 40.45103
## 27       orangepink_5        1   full 286.6000 27.15113
## 28     orangepurple_5        1   full 329.0000 25.65469
## 29           purple_3        1   full 382.2222 21.08185
## 30          redblue_4        1   full 342.2449 40.53113
## 31          redblue_4        2 full_2 335.6667 51.89189
## 32         redgreen_5        1   full 334.7059 34.19666
## 33         redgreen_5        2 full_2 314.5283 31.53545
## 34          redpink_5        1   full 288.3929 41.06654
## 35          redpink_5        2    low 275.3448 34.24450
## 36        redpurple_5        1   full 311.4545 31.64726
## 37        redpurple_5        2   high 336.4444 40.75995
## 38           silver_5        1   full 305.3846 38.39070
## 39            white_4        1   full 343.1250 47.49720
## 40            white_4        2 full_2 342.2388 38.95689
## 41        whiteblue_5        1   full 318.2143 27.17667
## 42        whiteblue_5        2    low 351.6242 29.13402
## 43        whitegold_5        1   full 315.2083 34.20772
## 44        whitegold_5        2    low 307.2727 45.76009
## 45       whitegreen_4        1   full 300.7692 33.43372
## 46      whiteorange_5        1   full 324.4444 51.71280
## 47      whiteorange_5        2    low 355.2308 44.74234
## 48        whitepink_5        1   full 336.8750 42.68285
## 49        whitepink_5        2   high 323.9161 28.55770
## 50      whitepurple_5        1   full 328.1818 43.50781
## 51         whitered_5        1   full 288.8136 36.05956
## 52         whitered_5        2   high 304.6667 42.10035
## 53      whiteyellow_5        1   full 303.4545 39.07344
## 54      whiteyellow_5        2 full_2 312.5532 36.56252
## 55       yellowblue_5        1   full 313.7037 44.86021
## 56       yellowblue_5        2   high 327.9688 49.76099
## 57      yellowgreen_5        1   full 298.0392 40.64577
## 58      yellowgreen_5        2 full_2 300.6000 27.58364
## 59     yelloworange_5        1   full 268.1481 36.18996
## 60     yelloworange_5        2   high 313.9205 39.62646
## 61       yellowpink_5        1   full 323.6207 39.98676
## 62       yellowpink_5        2    low 335.7042 41.00507
## 63     yellowpurple_5        1   full 354.4444 36.37678
## 64     yellowpurple_5        2    low 373.0058 34.17237
## 65        yellowred_5        1   full 320.1754 41.85396
## 66        yellowred_5        2 full_2 333.0357 29.96481
agg_sm[duplicated(data.frame(agg_sm$colNum, agg_sm$trialNum)), ]
## [1] colNum   trialNum trt      freq     freq_sd 
## <0 rows> (or 0-length row.names)
ggplot(agg_sm, aes(x = trt, y = freq, fill = trialNum > 1)) +
  geom_boxplot(alpha = 0.2) + 
  geom_point(aes(color = trialNum>1)) + 
  geom_line(aes(group = colNum))

diffdf <- sapply(unique(agg_sm$colNum), FUN = function(x){
  tmp = agg_sm[agg_sm$colNum == x, ]
  if(nrow(tmp) <= 1)
    diff = NA
  else
    diff = tmp$freq[tmp$trialNum == 2] - tmp$freq[tmp$trialNum == 1]
  return(diff)
})

trtDF = sapply(unique(agg_sm$colNum), FUN = function(x){
  tmp = agg_sm[agg_sm$colNum == x, ]
  ttrs = paste(tmp$trt[tmp$trialNum == 1], tmp$trt[tmp$trialNum == 2], sep = "_")
  return(ttrs)
})

buzzdiffs = data.frame(trtDF, diffdf)

ggplot(buzzdiffs, aes(x = trtDF, y= diffdf)) + 
  geom_boxplot() + 
  geom_point()
## Warning: Removed 20 rows containing non-finite values (stat_boxplot).
## Warning: Removed 20 rows containing missing values (geom_point).

agg2 = aggregate(sl$freq, by=list(colNum = sl$colNum, fullTrt = sl$trt == "full", trt = sl$trt), FUN=mean, na.rm=TRUE)
colnames(agg2)[colnames(agg2) == "x"] = "freq"
agg2$trt = as.character(agg2$trt)

diffdf <- t(as.data.frame(t(sapply(unique(agg2$colNum), FUN = function(x){
  tmp = agg2[agg2$colNum == x, ]
  if(nrow(tmp) <= 1)
    return(NA)
  if (length(unique(tmp$trt)) > 2){
    tmp = tmp[tmp$trt != "full_2", ]
  }
  diff =  tmp$freq[!tmp$fullTrt] - tmp$freq[tmp$fullTrt]
  return(diff)

}))))

length(diffdf)
## [1] 43
trtDF = sapply(unique(agg2$colNum), FUN = function(x){
  tmp = agg2[agg2$colNum == x, ]
  if (length(unique(tmp$trt)) > 2){
    tmp = tmp[tmp$trt != "full_2", ]
  }
  ttrs = paste(tmp$trt[tmp$fullTrt], tmp$trt[!tmp$fullTrt], sep = "_")
  return(ttrs)
})

length(trtDF)
## [1] 43
buzzdiffs = data.frame(trtDF, diffdf)
tapply(buzzdiffs$diffdf, INDEX = buzzdiffs$trtDF, mean)
##       full_ full_full_2   full_high    full_low 
##          NA    4.851689   13.209001   14.307123
ggplot(droplevels(buzzdiffs[buzzdiffs$trtDF != "full_", ]), aes(x = trtDF, y= as.numeric(diffdf))) + 
  geom_boxplot() + 
  geom_point()

m1 = lm(as.numeric(diffdf) ~ trtDF - 1, data = droplevels(buzzdiffs[buzzdiffs$trtDF != "full_", ]))
summary(m1)
## 
## Call:
## lm(formula = as.numeric(diffdf) ~ trtDF - 1, data = droplevels(buzzdiffs[buzzdiffs$trtDF != 
##     "full_", ]))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -38.741 -17.992  -3.556  15.767  53.901 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)
## trtDFfull_full_2    4.852      7.488   0.648    0.524
## trtDFfull_high     13.209      9.667   1.366    0.186
## trtDFfull_low      14.307      8.372   1.709    0.102
## 
## Residual standard error: 23.68 on 21 degrees of freedom
## Multiple R-squared:  0.1987, Adjusted R-squared:  0.08422 
## F-statistic: 1.736 on 3 and 21 DF,  p-value: 0.1904
sl$trt = relevel(factor(sl$trt), ref = "full")


sl$trt2 = mapvalues(sl$trt, from = c("full", "full_2", "high", "low"), 
                    to = c("full", "full", "high", "low"))


# summary for paper
# fit a varying slope and intercept for colNum (bee ID), and allow the slope of the trialNum
# variable to vary by colNum (beeID)
m2 = lmer(freq ~ trt2 + as.factor(hive) + trialNum + IT_imputed + (1+trialNum|colNum), data = sl, REML = FALSE)
summary(m2)
## Linear mixed model fit by maximum likelihood  ['lmerMod']
## Formula: freq ~ trt2 + as.factor(hive) + trialNum + IT_imputed + (1 +  
##     trialNum | colNum)
##    Data: sl
## 
##       AIC       BIC    logLik  deviance  df.resid 
##  245997.4  246086.5 -122987.7  245975.4     24292 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -4.0287 -0.5129  0.1700  0.6809  3.9087 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr 
##  colNum   (Intercept)  898.7   29.98         
##           trialNum     102.8   10.14    -0.75
##  Residual             1438.7   37.93         
## Number of obs: 24303, groups:  colNum, 43
## 
## Fixed effects:
##                  Estimate Std. Error t value
## (Intercept)       495.057     42.930  11.532
## trt2high           10.980      2.286   4.803
## trt2low            17.939      1.910   9.391
## as.factor(hive)4  -33.318     16.394  -2.032
## as.factor(hive)5  -39.802     13.910  -2.861
## trialNum            1.491      2.038   0.732
## IT_imputed        -32.344     10.232  -3.161
## 
## Correlation of Fixed Effects:
##             (Intr) trt2hg trt2lw as.()4 as.()5 trilNm
## trt2high     0.065                                   
## trt2low     -0.014  0.011                            
## as.fctr(h)4 -0.350  0.029  0.003                     
## as.fctr(h)5 -0.175  0.031 -0.032  0.762              
## trialNum     0.008 -0.046 -0.041 -0.021 -0.029       
## IT_imputed  -0.948 -0.079  0.018  0.105 -0.125 -0.085
# this model estimates a global intercept
# random effect intercept for colNum (beeID)
# a single global estimate for trialNum
# the effect of trialNum within each level of colNum
# the correlation between intercept of trialNum across levels of colNum


m2_1 = lmer(freq ~ trt2* IT_imputed + as.factor(hive) + trialNum  + (1+trialNum|colNum), data = sl, REML = FALSE)
summary(m2_1)
## Linear mixed model fit by maximum likelihood  ['lmerMod']
## Formula: freq ~ trt2 * IT_imputed + as.factor(hive) + trialNum + (1 +  
##     trialNum | colNum)
##    Data: sl
## 
##       AIC       BIC    logLik  deviance  df.resid 
##  245984.9  246090.2 -122979.4  245958.9     24290 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -4.0266 -0.5157  0.1730  0.6816  3.8948 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr 
##  colNum   (Intercept)  900.9   30.02         
##           trialNum     102.7   10.13    -0.77
##  Residual             1437.8   37.92         
## Number of obs: 24303, groups:  colNum, 43
## 
## Fixed effects:
##                     Estimate Std. Error t value
## (Intercept)          513.024     42.340  12.117
## trt2high              -7.716     42.268  -0.183
## trt2low              -66.114     20.819  -3.176
## IT_imputed           -36.663     10.078  -3.638
## as.factor(hive)4     -35.026     16.146  -2.169
## as.factor(hive)5     -39.992     13.708  -2.917
## trialNum               1.448      2.023   0.716
## trt2high:IT_imputed    4.160      9.344   0.445
## trt2low:IT_imputed    20.713      5.109   4.054
## 
## Correlation of Fixed Effects:
##             (Intr) trt2hg trt2lw IT_mpt as.()4 as.()5 trilNm trt2h:IT_
## trt2high    -0.084                                                    
## trt2low     -0.101  0.006                                             
## IT_imputed  -0.948  0.058  0.106                                      
## as.fctr(h)4 -0.353  0.091  0.011  0.108                               
## as.fctr(h)5 -0.180  0.093 -0.009 -0.119  0.765                        
## trialNum     0.012 -0.040  0.000 -0.091 -0.025 -0.033                 
## trt2hgh:IT_  0.088 -0.999 -0.006 -0.062 -0.090 -0.092  0.037          
## trt2lw:IT_m  0.100 -0.005 -0.996 -0.104 -0.011  0.006 -0.004  0.006
BIC(m2, m2_1) #245963
##      df      BIC
## m2   11 246086.5
## m2_1 13 246090.2
anova(m2, m2_1)
## Data: sl
## Models:
## m2: freq ~ trt2 + as.factor(hive) + trialNum + IT_imputed + (1 + 
## m2:     trialNum | colNum)
## m2_1: freq ~ trt2 * IT_imputed + as.factor(hive) + trialNum + (1 + 
## m2_1:     trialNum | colNum)
##      Df    AIC    BIC  logLik deviance  Chisq Chi Df Pr(>Chisq)    
## m2   11 245997 246087 -122988   245975                             
## m2_1 13 245985 246090 -122979   245959 16.547      2  0.0002552 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
m3 = lmer(freq ~  as.factor(hive) + trialNum + IT_imputed + (1+trialNum|colNum), data = sl)
summary(m3)
## Linear mixed model fit by REML ['lmerMod']
## Formula: freq ~ as.factor(hive) + trialNum + IT_imputed + (1 + trialNum |  
##     colNum)
##    Data: sl
## 
## REML criterion at convergence: 246056.8
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -4.0037 -0.5140  0.1670  0.6786  3.7033 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr 
##  colNum   (Intercept) 1094.9   33.09         
##           trialNum     113.4   10.65    -0.74
##  Residual             1444.8   38.01         
## Number of obs: 24303, groups:  colNum, 43
## 
## Fixed effects:
##                  Estimate Std. Error t value
## (Intercept)       490.032     47.808  10.250
## as.factor(hive)4  -36.161     18.277  -1.979
## as.factor(hive)5  -37.553     15.503  -2.422
## trialNum            2.765      2.134   1.296
## IT_imputed        -30.858     11.384  -2.711
## 
## Correlation of Fixed Effects:
##             (Intr) as.()4 as.()5 trilNm
## as.fctr(h)4 -0.356                     
## as.fctr(h)5 -0.179  0.761              
## trialNum     0.015 -0.020 -0.029       
## IT_imputed  -0.948  0.111 -0.121 -0.089
anova(m2,m3) # p-val for paper
## refitting model(s) with ML (instead of REML)
## Data: sl
## Models:
## m3: freq ~ as.factor(hive) + trialNum + IT_imputed + (1 + trialNum | 
## m3:     colNum)
## m2: freq ~ trt2 + as.factor(hive) + trialNum + IT_imputed + (1 + 
## m2:     trialNum | colNum)
##    Df    AIC    BIC  logLik deviance  Chisq Chi Df Pr(>Chisq)    
## m3  9 246103 246176 -123043   246085                             
## m2 11 245997 246087 -122988   245975 109.61      2  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
AIC(m3) #246074.8
## [1] 246074.8
plot(m2)

qqnorm(ranef(m2)$colNum[[1]])
qqline(ranef(m2)$colNum[[1]])

sjp.lmer(m2) # plot random effects to find any outliers
## `sjp.lmer()` will become deprecated in the future. Please use `plot_model()` instead.
## Plotting random effects...

sjp.lmer(m2, type = 'fe', sort = TRUE, p.kr = FALSE) # plot fixed effects
## Computing p-values via Wald-statistics approximation (treating t as Wald z).

# post-hoc tests -- pvals for paper
summary(glht(m2, linfct = mcp(trt2 = "Tukey")), test = adjusted("none"))
## 
##   Simultaneous Tests for General Linear Hypotheses
## 
## Multiple Comparisons of Means: Tukey Contrasts
## 
## 
## Fit: lmer(formula = freq ~ trt2 + as.factor(hive) + trialNum + IT_imputed + 
##     (1 + trialNum | colNum), data = sl, REML = FALSE)
## 
## Linear Hypotheses:
##                  Estimate Std. Error z value Pr(>|z|)    
## high - full == 0   10.980      2.286   4.803 1.56e-06 ***
## low - full == 0    17.939      1.910   9.391  < 2e-16 ***
## low - high == 0     6.959      2.962   2.349   0.0188 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- none method)
ggplot(aggdata[aggdata$trialNum <= 2, ], aes(x = trialNum, y = freq)) + 
       geom_point() + 
  facet_wrap(~colNum)

Generate CI’s

# set number of bootstrap samples
nbootSims = 1000

# change hive to factor
sl$hive = as.factor(sl$hive)
m2 = lmer(freq ~ trt2 + hive + trialNum + IT_imputed+  (1+trialNum|colNum), data = sl)
summary(m2)
## Linear mixed model fit by REML ['lmerMod']
## Formula: freq ~ trt2 + hive + trialNum + IT_imputed + (1 + trialNum |  
##     colNum)
##    Data: sl
## 
## REML criterion at convergence: 245941
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -4.0287 -0.5118  0.1699  0.6810  3.9092 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr 
##  colNum   (Intercept)  979.5   31.30         
##           trialNum     113.2   10.64    -0.74
##  Residual             1438.7   37.93         
## Number of obs: 24303, groups:  colNum, 43
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  496.111     45.174  10.982
## trt2high      10.987      2.292   4.795
## trt2low       17.916      1.914   9.360
## hive4        -33.157     17.222  -1.925
## hive5        -39.823     14.612  -2.725
## trialNum       1.578      2.142   0.737
## IT_imputed   -32.627     10.765  -3.031
## 
## Correlation of Fixed Effects:
##            (Intr) trt2hg trt2lw hive4  hive5  trilNm
## trt2high    0.061                                   
## trt2low    -0.013  0.010                            
## hive4      -0.352  0.026  0.003                     
## hive5      -0.175  0.029 -0.031  0.761              
## trialNum    0.007 -0.044 -0.038 -0.021 -0.028       
## IT_imputed -0.948 -0.074  0.016  0.107 -0.123 -0.083
ITmean = mean(tapply(sl$IT_imputed, INDEX = sl$beeCol, FUN = function(x) x[1] ))
# using hive 5, since it's the one with the most data
pframe <- data.frame(trt2 = levels(droplevels(sl$trt2)), hive = factor(5, levels = levels(sl$hive)), IT_imputed = ITmean,  colNum = 99999, trialNum = 2)
pframe$freq <- 0
pp <- predict(m2, newdata = pframe, re.form=NA, type = 'response') # re.form sets all random effects to 0


### Calculate CI's (using bootstrap, not accounting for random effects)
bb2 <- bootMer(m2, FUN=function(x) predict(x, pframe, re.form=NA, type = 'response'), nsim = nbootSims)
bb2_se <-apply(bb2$t,2,function(x) quantile(x, probs = c(0.025, 0.975)))
pframe$blo<-bb2_se[1,]
pframe$bhi<-bb2_se[2,]
pframe$predMean <- pp
pframe <- pframe[, c('trt2', "blo", "bhi", "predMean")]

Make plots for paper

#plot 95% confidence intervals
# "Mean and bootstrap CI based on fixed-effects uncertainty ONLY"
pframe$trt3 = mapvalues(pframe$trt2, from = c("full", "high", "low"), 
                        to = c("Full range\n(220 - 450 Hz)", "High range\n(340 - 390 Hz)", "Low range\n(220 - 330 Hz)"))

pframe
##   trt2      blo      bhi predMean                       trt3
## 1 full 317.7762 333.3892 325.9133 Full range\n(220 - 450 Hz)
## 2 high 328.0518 345.9465 336.9002 High range\n(340 - 390 Hz)
## 3  low 335.5351 352.8245 343.8293  Low range\n(220 - 330 Hz)
g0 <- ggplot(pframe, aes(x=trt3, y=predMean))+
     geom_point()+ 
     labs(y = "Sonication frequency (Hz)", x = "Frequency range for reward") + 
     geom_errorbar(aes(ymin = blo, ymax = bhi), width = 0.1)+
     theme(axis.text.x = element_text(angle = 45, hjust = 1), 
           legend.position = 'none') +
     theme_classic() + 
  annotate(geom="text", x=c(1,2,3), y=c(0, 0, 0) + 355, label=c("a", "b", "c"),
                color="black") 
g0

ggsave(plot = g0, filename = file.path(figDir, "SonicationFreqPredsAndCI.pdf"), width = 5, height = 4)




# this basically gives the same result as bootstrapping
# it's a good double check
ee <- as.data.frame(Effect(c("trt2"), 
                           fixed.predictors =list(given.values = c(hive4 = 0, 
                                                                   hive5 = 1, 
                                                                   trialNum = 2, 
                                                                   IT_imputed = ITmean)),  
                           m2) )

table(sl$hive)
## 
##     3     4     5 
##   513  1047 22743
# compare two methods -- very similar
ee
##   trt2      fit       se    lower    upper
## 1 full 325.9133 4.053780 317.9676 333.8590
## 2 high 336.9002 4.517710 328.0452 345.7552
## 3  low 343.8293 4.274928 335.4502 352.2084
pframe
##   trt2      blo      bhi predMean                       trt3
## 1 full 317.7762 333.3892 325.9133 Full range\n(220 - 450 Hz)
## 2 high 328.0518 345.9465 336.9002 High range\n(340 - 390 Hz)
## 3  low 335.5351 352.8245 343.8293  Low range\n(220 - 330 Hz)
g1 <- ggplot(ee, aes(x=trt2, y=fit))+
     geom_point()+ 
     labs(y = "Sonication Frequency (Hz)", x = "Treatment") + 
     geom_errorbar(aes(ymin = lower, ymax = upper), width = 0.1)+
     theme_classic() + 
    annotate(geom="text", x=c(1,2,3), y=c(0, 0, 0) + 352, label=c("a", "b", "c"),
                color="black") 
g1

Analysis for amplitude

First, convert amplitude (originally measured in Volts) to m/s/s, using the accelerometer calibration

# conversion factor = 10.17 mv/(m/s/s)
sl$amp_acc = (sl$amp * 1000) / 10.17
sl$hive <- as.factor(sl$hive)

maa1 = lmer(amp_acc ~ trt2 + IT_imputed + hive + trialNum  +(1+trialNum|colNum), data = sl)
summary(maa1) 
## Linear mixed model fit by REML ['lmerMod']
## Formula: amp_acc ~ trt2 + IT_imputed + hive + trialNum + (1 + trialNum |  
##     colNum)
##    Data: sl
## 
## REML criterion at convergence: 238287.7
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.5423 -0.7054 -0.1415  0.6090  4.0749 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr 
##  colNum   (Intercept)  153.012 12.370        
##           trialNum       5.702  2.388   -0.44
##  Residual             1053.832 32.463        
## Number of obs: 24303, groups:  colNum, 43
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept) -16.6652    24.4325  -0.682
## trt2high     12.3783     1.8688   6.624
## trt2low       4.2633     1.5886   2.684
## IT_imputed   14.1227     5.8215   2.426
## hive4         3.1126     9.2047   0.338
## hive5        12.4595     7.7743   1.603
## trialNum     -0.2630     0.5496  -0.479
## 
## Correlation of Fixed Effects:
##            (Intr) trt2hg trt2lw IT_mpt hive4  hive5 
## trt2high    0.094                                   
## trt2low    -0.019  0.025                            
## IT_imputed -0.952 -0.116  0.023                     
## hive4      -0.337  0.042  0.004  0.095              
## hive5      -0.168  0.045 -0.047 -0.129  0.760       
## trialNum    0.008 -0.085 -0.072 -0.046 -0.010 -0.004
AIC(maa1) 
## [1] 238309.7
# interaction
maa1_1 = lmer(amp_acc ~ trt2*IT_imputed + hive + trialNum  +(1+trialNum|colNum), data = sl)
summary(maa1_1) 
## Linear mixed model fit by REML ['lmerMod']
## Formula: amp_acc ~ trt2 * IT_imputed + hive + trialNum + (1 + trialNum |  
##     colNum)
##    Data: sl
## 
## REML criterion at convergence: 238263.2
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.5495 -0.7049 -0.1419  0.6095  4.0640 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr 
##  colNum   (Intercept)  151.558 12.311        
##           trialNum       6.111  2.472   -0.40
##  Residual             1053.234 32.454        
## Number of obs: 24303, groups:  colNum, 43
## 
## Fixed effects:
##                      Estimate Std. Error t value
## (Intercept)           -6.0179    25.0783  -0.240
## trt2high            -115.9726    34.4161  -3.370
## trt2low                7.5758    17.4509   0.434
## IT_imputed            12.2859     5.9564   2.063
## hive4                 -0.6796     9.3372  -0.073
## hive5                  9.2995     7.8863   1.179
## trialNum              -0.1907     0.5706  -0.334
## trt2high:IT_imputed   28.5033     7.6269   3.737
## trt2low:IT_imputed    -0.8155     4.2763  -0.191
## 
## Correlation of Fixed Effects:
##             (Intr) trt2hg trt2lw IT_mpt hive4  hive5  trilNm trt2h:IT_
## trt2high    -0.116                                                    
## trt2low     -0.141  0.011                                             
## IT_imputed  -0.953  0.082  0.147                                      
## hive4       -0.345  0.115  0.015  0.106                               
## hive5       -0.177  0.118 -0.013 -0.116  0.763                        
## trialNum     0.002 -0.062  0.016 -0.036 -0.014 -0.010                 
## trt2hgh:IT_  0.121 -0.999 -0.012 -0.089 -0.113 -0.115  0.057          
## trt2lw:IT_m  0.140 -0.010 -0.996 -0.146 -0.015  0.009 -0.022  0.012
AIC(maa1_1, maa1) 
##        df      AIC
## maa1_1 13 238289.2
## maa1   11 238309.7
anova(maa1_1, maa1) 
## refitting model(s) with ML (instead of REML)
## Data: sl
## Models:
## maa1: amp_acc ~ trt2 + IT_imputed + hive + trialNum + (1 + trialNum | 
## maa1:     colNum)
## maa1_1: amp_acc ~ trt2 * IT_imputed + hive + trialNum + (1 + trialNum | 
## maa1_1:     colNum)
##        Df    AIC    BIC  logLik deviance  Chisq Chi Df Pr(>Chisq)   
## maa1   11 238336 238425 -119157   238314                            
## maa1_1 13 238326 238431 -119150   238300 13.792      2   0.001012 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
maa2 = update(maa1, .~. - trt2)
summary(maa2)
## Linear mixed model fit by REML ['lmerMod']
## Formula: amp_acc ~ IT_imputed + hive + trialNum + (1 + trialNum | colNum)
##    Data: sl
## 
## REML criterion at convergence: 238342.9
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.5697 -0.7021 -0.1420  0.6060  4.0818 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr 
##  colNum   (Intercept)  174.797 13.221        
##           trialNum       5.207  2.282   -0.57
##  Residual             1055.931 32.495        
## Number of obs: 24303, groups:  colNum, 43
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept) -29.8966    24.3886  -1.226
## IT_imputed   18.3545     5.8093   3.159
## hive4        -0.8024     9.2461  -0.087
## hive5         9.5511     7.8037   1.224
## trialNum      0.3193     0.5112   0.624
## 
## Correlation of Fixed Effects:
##            (Intr) IT_mpt hive4  hive5 
## IT_imputed -0.951                     
## hive4      -0.341  0.099              
## hive5      -0.170 -0.129  0.760       
## trialNum    0.046 -0.095 -0.010 -0.004
AIC(maa2)
## [1] 238360.9
anova(maa1, maa2) # p-value for paper for acceleration
## refitting model(s) with ML (instead of REML)
## Data: sl
## Models:
## maa2: amp_acc ~ IT_imputed + hive + trialNum + (1 + trialNum | colNum)
## maa1: amp_acc ~ trt2 + IT_imputed + hive + trialNum + (1 + trialNum | 
## maa1:     colNum)
##      Df    AIC    BIC  logLik deviance  Chisq Chi Df Pr(>Chisq)    
## maa2  9 238381 238454 -119181   238363                             
## maa1 11 238336 238425 -119157   238314 49.207      2  2.064e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# p-value for hive for acceleration
maa3 = update(maa1, .~. -hive)
anova(maa1, maa3)
## refitting model(s) with ML (instead of REML)
## Data: sl
## Models:
## maa3: amp_acc ~ trt2 + IT_imputed + trialNum + (1 + trialNum | colNum)
## maa1: amp_acc ~ trt2 + IT_imputed + hive + trialNum + (1 + trialNum | 
## maa1:     colNum)
##      Df    AIC    BIC  logLik deviance  Chisq Chi Df Pr(>Chisq)
## maa3  9 238336 238409 -119159   238318                         
## maa1 11 238336 238425 -119157   238314 4.4596      2     0.1076
# diagnostics
plot(maa1)

qqnorm(ranef(maa1)$colNum[[1]])
qqline(ranef(maa1)$colNum[[1]])

sjp.lmer(maa1) # plot random effects to find any outliers
## Plotting random effects...

sjp.lmer(maa1, type = 'fe', sort = TRUE, p.kr = FALSE) # plot fixed effects
## Computing p-values via Wald-statistics approximation (treating t as Wald z).

# post-hoc tests -- pvals for paper
summary(glht(maa1, linfct = mcp(trt2 = "Tukey")), test = adjusted("none"))
## 
##   Simultaneous Tests for General Linear Hypotheses
## 
## Multiple Comparisons of Means: Tukey Contrasts
## 
## 
## Fit: lmer(formula = amp_acc ~ trt2 + IT_imputed + hive + trialNum + 
##     (1 + trialNum | colNum), data = sl)
## 
## Linear Hypotheses:
##                  Estimate Std. Error z value Pr(>|z|)    
## high - full == 0   12.378      1.869   6.624 3.51e-11 ***
## low - full == 0     4.263      1.589   2.684 0.007280 ** 
## low - high == 0    -8.115      2.422  -3.350 0.000807 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- none method)

Generate CI’s for amplitude

# set number of bootstrap samples
nbootSims2 = 1000

# using hive 5, since it's the one with the most data
pframe <- data.frame(trt2 = levels(droplevels(sl$trt2)), 
                     hive = factor(5, levels = levels(sl$hive)), 
                     IT_imputed = ITmean,  colNum = 99999, trialNum = 2)
pframe$amp_acc <- 0
pp <- predict(maa1, newdata = pframe, re.form=NA, type = 'response') # re.form sets all random effects to 0


### Calculate CI's (using bootstrap, not accounting for random effects)
bb2 <- bootMer(maa1, FUN=function(x) predict(x, pframe, re.form=NA, type = 'response'), nsim = nbootSims2)
bb2_se <-apply(bb2$t,2,function(x) quantile(x, probs = c(0.025, 0.975)))
pframe$blo<-bb2_se[1,]
pframe$bhi<-bb2_se[2,]
pframe$predMean <- pp
pframe <- pframe[, c('trt2', "blo", "bhi", "predMean")]

Make plots for amplitude for paper

#plot 95% confidence intervals
# "Mean and bootstrap CI based on fixed-effects uncertainty ONLY"
pframe$trt3 = mapvalues(pframe$trt2, from = c("full", "high", "low"), 
                        to = c("Full range\n(220 - 450 Hz)", "High range\n(340 - 390 Hz)", "Low range\n(220 - 330 Hz)"))

pframe
##   trt2      blo      bhi predMean                       trt3
## 1 full 48.87920 57.30688 53.06736 Full range\n(220 - 450 Hz)
## 2 high 60.25571 70.84571 65.44564 High range\n(340 - 390 Hz)
## 3  low 52.20973 62.00369 57.33067  Low range\n(220 - 330 Hz)
ga0 <- ggplot(pframe, aes(x=trt3, y=predMean))+
     geom_point()+ 
     labs( y = expression ("Sonication amplitude "(m~s^{-2})), x = "Frequency range for reward") + 
     geom_errorbar(aes(ymin = blo, ymax = bhi), width = 0.1)+
     theme(axis.text.x = element_text(angle = 45, hjust = 1), 
           legend.position = 'none') +
     theme_classic() + 
  annotate(geom="text", x=c(1,2,3), y=c(0, 0, 0) + 75, label=c("a", "b", "c"),
                color="black") 
ga0

ggsave(plot = ga0, filename = file.path(figDir, "SonicationAmpPredsAndCI.pdf"), width = 5, height = 4)



tapply(sl$amp, INDEX = sl$trt, mean)
##      full    full_2      high       low 
## 0.5237935 0.6284929 0.6886385 0.6059128
# conversion factor = 10.17 mv/(m/s/s)
sl$amp_acc = (sl$amp * 1000) / 10.17
sl$hive <- as.factor(sl$hive)


aggdata <- aggregate(sl$amp_acc, by=list(colNum = sl$colNum,trialNum =sl$trialNum, trt = sl$trt), FUN=mean, na.rm=TRUE)
colnames(aggdata)[colnames(aggdata) == "x"] = "amp"

aggdata_sd <- aggregate(sl$amp_acc, by=list(colNum = sl$colNum,trialNum =sl$trialNum, trt = sl$trt), FUN=sd, na.rm=TRUE)
colnames(aggdata_sd)[colnames(aggdata_sd) == "x"] = "amp_sd"

aggdata = merge(aggdata, aggdata_sd)
aggdata = aggdata[order(aggdata$colNum, aggdata$trialNum, decreasing = FALSE), ]
agg_sm = aggdata[aggdata$trialNum <= 2, ]
rownames(agg_sm) = 1:nrow(agg_sm)
agg_sm
##                colNum trialNum    trt      amp    amp_sd
## 1              blue_4        1   full 40.03641 27.005004
## 2              gold_3        1   full 32.58012 13.395404
## 3           goldred_4        1   full 27.49328  8.575236
## 4             green_4        1   full 44.14520 45.654277
## 5             green_4        2 full_2 19.19985 21.543257
## 6              lime_5        1   full 72.53866 33.940021
## 7          limeblue_5        1   full 49.20279 24.837998
## 8          limeblue_5        2 full_2 59.04592 36.444346
## 9          limegold_5        1   full 34.95839 26.390545
## 10        limegreen_5        1   full 52.56061 32.949195
## 11       limeorange_5        1   full 43.28056 22.966005
## 12       limeorange_5        2 full_2 50.75113 25.907474
## 13         limepink_5        1   full 49.57493 19.711454
## 14       limepurple_5        1   full 43.98249 22.702402
## 15       limepurple_5        2    low 39.95722 19.359774
## 16 limepurpleyellow_5        1   full 34.54496 17.151702
## 17          limered_5        1   full 42.27701 20.963674
## 18          limered_5        2    low 40.63711 20.895896
## 19       limesilver_5        1   full 26.24910  7.336471
## 20        limewhite_5        1   full 37.65233 22.862123
## 21        limewhite_5        2 full_2 40.91118 19.936507
## 22       limeyellow_5        1   full 43.51579 23.156058
## 23           orange_3        1   full 32.25152 18.932874
## 24           orange_5        1   full 55.08235 20.197053
## 25       orangeblue_5        1   full 51.97426 23.301810
## 26      orangegreen_5        1   full 58.59866 33.410963
## 27       orangepink_5        1   full 50.98635 28.370137
## 28     orangepurple_5        1   full 34.53312 16.739467
## 29           purple_3        1   full 53.61466 21.935890
## 30          redblue_4        1   full 28.88506 11.312274
## 31          redblue_4        2 full_2 48.61796 25.277613
## 32         redgreen_5        1   full 78.72927 46.273733
## 33         redgreen_5        2 full_2 70.38830 47.080737
## 34          redpink_5        1   full 74.20294 37.536798
## 35          redpink_5        2    low 56.85081 31.951114
## 36        redpurple_5        1   full 58.06343 40.818616
## 37        redpurple_5        2   high 53.13637 26.813039
## 38           silver_5        1   full 46.34517 30.280907
## 39            white_4        1   full 47.76100 21.162920
## 40            white_4        2 full_2 46.45015 22.564532
## 41        whiteblue_5        1   full 66.11499 33.532056
## 42        whiteblue_5        2    low 67.00735 29.330027
## 43        whitegold_5        1   full 43.10847 26.148044
## 44        whitegold_5        2    low 45.20369 27.241996
## 45       whitegreen_4        1   full 38.87762 18.184245
## 46      whiteorange_5        1   full 61.63802 39.835205
## 47      whiteorange_5        2    low 45.85399 25.838373
## 48        whitepink_5        1   full 63.53760 35.030181
## 49        whitepink_5        2   high 80.19250 47.237615
## 50      whitepurple_5        1   full 47.77247 35.753931
## 51         whitered_5        1   full 48.37233 25.860700
## 52         whitered_5        2   high 74.79446 45.764818
## 53      whiteyellow_5        1   full 65.50425 32.808944
## 54      whiteyellow_5        2 full_2 56.00923 33.372943
## 55       yellowblue_5        1   full 52.45834 30.688932
## 56       yellowblue_5        2   high 79.36156 41.580725
## 57      yellowgreen_5        1   full 61.58710 33.835105
## 58      yellowgreen_5        2 full_2 79.18968 27.697874
## 59     yelloworange_5        1   full 73.32616 36.109449
## 60     yelloworange_5        2   high 54.48161 24.329362
## 61       yellowpink_5        1   full 52.74199 24.869065
## 62       yellowpink_5        2    low 57.76271 29.939244
## 63     yellowpurple_5        1   full 62.77375 29.437162
## 64     yellowpurple_5        2    low 68.98565 35.865129
## 65        yellowred_5        1   full 62.77866 39.753892
## 66        yellowred_5        2 full_2 47.97187 28.376661
agg_sm[duplicated(data.frame(agg_sm$colNum, agg_sm$trialNum)), ]
## [1] colNum   trialNum trt      amp      amp_sd  
## <0 rows> (or 0-length row.names)
ggplot(agg_sm, aes(x = trt, y = amp, fill = trialNum > 1)) +
  geom_boxplot(alpha = 0.2) + 
  geom_point(aes(color = trialNum>1)) + 
  geom_line(aes(group = colNum))

diffdf <- sapply(unique(agg_sm$colNum), FUN = function(x){
  tmp = agg_sm[agg_sm$colNum == x, ]
  if(nrow(tmp) <= 1)
    diff = NA
  else
    diff = tmp$amp[tmp$trialNum == 2] - tmp$amp[tmp$trialNum == 1]
  return(diff)
})

trtDF = sapply(unique(agg_sm$colNum), FUN = function(x){
  tmp = agg_sm[agg_sm$colNum == x, ]
  ttrs = paste(tmp$trt[tmp$trialNum == 1], tmp$trt[tmp$trialNum == 2], sep = "_")
  return(ttrs)
})

buzzdiffs = data.frame(trtDF, diffdf)

ggplot(buzzdiffs, aes(x = trtDF, y= diffdf)) + 
  geom_boxplot() + 
  geom_point()
## Warning: Removed 20 rows containing non-finite values (stat_boxplot).
## Warning: Removed 20 rows containing missing values (geom_point).

agg2 = aggregate(sl$amp_acc, by=list(colNum = sl$colNum, fullTrt = sl$trt == "full", trt = sl$trt), FUN=mean, na.rm=TRUE)
colnames(agg2)[colnames(agg2) == "x"] = "amp"
agg2$trt = as.character(agg2$trt)

diffdf <- t(as.data.frame(t(sapply(unique(agg2$colNum), FUN = function(x){
  tmp = agg2[agg2$colNum == x, ]
  if(nrow(tmp) <= 1)
    return(NA)
  if (length(unique(tmp$trt)) > 2){
    tmp = tmp[tmp$trt != "full_2", ]
  }
  diff =  tmp$amp[!tmp$fullTrt] - tmp$amp[tmp$fullTrt]
  return(diff)

}))))

length(diffdf)
## [1] 43
trtDF = sapply(unique(agg2$colNum), FUN = function(x){
  tmp = agg2[agg2$colNum == x, ]
  if (length(unique(tmp$trt)) > 2){
    tmp = tmp[tmp$trt != "full_2", ]
  }
  ttrs = paste(tmp$trt[tmp$fullTrt], tmp$trt[!tmp$fullTrt], sep = "_")
  return(ttrs)
})

length(trtDF)
## [1] 43
buzzdiffs = data.frame(trtDF, diffdf)
tapply(buzzdiffs$diffdf, INDEX = buzzdiffs$trtDF, mean)
##       full_ full_full_2   full_high    full_low 
##          NA    2.871714    9.194234    3.249068
ggplot(droplevels(buzzdiffs[buzzdiffs$trtDF != "full_", ]), aes(x = trtDF, y= as.numeric(diffdf))) + 
  geom_boxplot() + 
  geom_point()

refref todo: Plot amplitude vs. frequency for initial trial, accounting for size

ggplot(sl[sl$trialNum == 1, ], aes(x = freq, y = amp_acc)) + 
  geom_point(position = position_jitter(width = 3), alpha = 0.2) + 
  theme_classic() + 
  geom_smooth(aes(group = trt2)) + 
  labs( y = expression ("Sonication amplitude "(m~s^{-2})), x = "Frequency (Hz)")
## `geom_smooth()` using method = 'gam'